#install.packages("rlang")
#install.packages("ggplot2")
#install.packages("tidyverse")
#install.packages("dplyr")
#install.packages("stringr")
#install.packages("lubridate")
#install.packages("tidymodels")
#install.packages("ggmap")
#install.packages("devtools")
#install.packages("devtools")
#devtools::install_github("adror1/nwslR")

library(ggplot2)
library(lubridate)
library(dplyr)
library(tidyr)
library(broom)
library(praise)
library(plotly)
player_season_stats <- nwslR::fieldplayer_overall_season_stats
team_data <- nwslR::team_stats_season
draft_data <- nwslR::draftpicks
adv_player_stats <- nwslR::adv_player_stats 
adv_team_stats <- nwslR::adv_team_stats 
awards <- nwslR::award
franchise <- nwslR::franchise
players <- nwslR::player

Goal Efficiency

player_season_stats <- inner_join(players, player_season_stats, by = "person_id", copy = FALSE) %>%
  select(-nation.y, -pos.y, -name_other)

player_season_stats <- player_season_stats %>%
  rename(nation = nation.x, pos = pos.x)
#gives us goals/min for each season
nwsl_goals_per_min <- player_season_stats %>%
  group_by(season) %>%
  filter(pos == "FW") %>%
  mutate(goals_per_min = gls/min) %>%
  filter(goals_per_min > 0, goals_per_min < .1) 


nwsl_goals_per_min_plot <- nwsl_goals_per_min %>%
  ggplot(aes(x = season, y = goals_per_min)) +
  geom_point(color = "blue") +
  geom_boxplot(color = "red", outliers = FALSE)
## Warning: Ignoring unknown parameters: outliers
ggplotly(nwsl_goals_per_min_plot, tooltip = "player_name")
## Warning: Continuous x aesthetic -- did you forget aes(group=...)?
#gives us an average goals/min over the player's career (2013 - 2019)
nwsl_goals_per_min_career <- nwsl_goals_per_min %>%
  group_by(player) %>%
  summarise(goals_per_min = mean(goals_per_min))

nwsl_goals_per_min_career
## # A tibble: 87 x 2
##    player                 goals_per_min
##  * <chr>                          <dbl>
##  1 Abby Wambach                 0.00719
##  2 Adriana                      0.00251
##  3 Alex Morgan                  0.00480
##  4 Alexa Newfield               0.00304
##  5 Allie Bailey                 0.00794
##  6 Ana-Maria Crnogorčević       0.00243
##  7 Arielle Ship                 0.0129 
##  8 Ashleigh Sykes               0.00101
##  9 Ashley Hatch                 0.00375
## 10 Bethany Balcer               0.00353
## # … with 77 more rows
players %>%
  filter(player == "Carli Lloyd")
## # A tibble: 1 x 5
##   person_id player      nation pos   name_other
##       <dbl> <chr>       <chr>  <chr> <chr>     
## 1        76 Carli Lloyd USA    MF    <NA>
player_season_stats %>%
  filter(player == "Carli Lloyd")
## # A tibble: 7 x 15
##   person_id player nation pos   season team_id    mp starts   min   gls   ast
##       <dbl> <chr>  <chr>  <chr>  <dbl> <chr>   <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1        76 Carli… USA    MF      2013 WNY        15     14     8     0    NA
## 2        76 Carli… USA    MF      2014 WNY        19     19  1710     8     5
## 3        76 Carli… USA    MF      2015 HOU        12     12  1080     4     0
## 4        76 Carli… USA    MF      2016 HOU         7      7   553     5     3
## 5        76 Carli… USA    MF      2017 HOU         8      8   647     2     0
## 6        76 Carli… USA    MF      2018 NJ         18     17  1563     4     1
## 7        76 Carli… USA    MF      2019 NJ         14     14  1260     8     1
## # … with 4 more variables: pk <dbl>, p_katt <dbl>, crd_y <dbl>, crd_r <dbl>
player_season_stats %>%
  filter(player == c("Carli Lloyd")) %>%
  ggplot(aes(x = season, y = gls)) + 
  geom_point(color = "red")

name = "Alex Morgan"

player_season_stats %>%
  filter(player == c(name)) %>%
  ggplot(aes(x = season, y = gls)) + 
  geom_point(shape = "star", color = "red")

Goals & Assists

p1 <- player_season_stats %>%
  #group_by(team_id) %>%
  ggplot(aes(x = gls, y = ast)) +
  geom_point(shape = "square", color = "blue") + 
  geom_smooth(color = "red")

ggplotly(p1)
## Warning: Removed 19 rows containing non-finite values (stat_smooth).

There appears to be a weak, positive correlation between goals and assists per season. The correlation grows weaker as the player scores more goals, meaning that they assist less. Let’s look at this by position:

player_season_stats %>%
  group_by(pos) %>%
  summarise(count = n())
## # A tibble: 6 x 2
##   pos   count
## * <chr> <int>
## 1 DF      353
## 2 DF,FW    40
## 3 DF,MF   129
## 4 FW      334
## 5 FW,MF   146
## 6 MF      348
ggplotly(
  player_season_stats %>%
  group_by(pos) %>%
  #mutate(gls = median(gls), ast = median(ast)) %>%#
  ggplot(aes(x =  pos, y = (ast/gls))) +
  geom_boxplot(outlier.shape = NA)
  )
## Warning: Removed 747 rows containing non-finite values (stat_boxplot).

Defenders have the lowest median assists per goal ratio, presumably because they do not usually score nor assist, while forwards have the second lowest, presumably because they score way more than they assist. But, strangely enough, the position category with the highest assists to goal ratio is the category of players who play both Defense and Forward. This is most likely occuring because: players who fill both the DF and FW positions are most likely placed on the outside, the wingers. Therefore, they are probably quick and skilled at crossing the ball, leading to a higher assist ratio!

maxgoals <- team_data %>%
  group_by(team_id) %>%
  group_by(season) %>%
  summarize(max(goals)) %>%
  ggplot(x = year, y = goals)


#ggplot(aes(x = maxgoals[["data"]][["season"]], y = maxgoals[["data"]][["max(goals)"]]))
maxgoals <- team_data %>%
  group_by(team_id) %>%
  summarize(max(goals))
  
maxgoals
## # A tibble: 12 x 2
##    team_id `max(goals)`
##  * <chr>          <int>
##  1 BOS               24
##  2 CHI               41
##  3 HOU               35
##  4 KC                29
##  5 NC                54
##  6 NJ                42
##  7 ORL               45
##  8 POR               40
##  9 SEA               43
## 10 UTA               25
## 11 WAS               30
## 12 WNY               40
all_goals <- team_data %>%
  group_by(season) %>%
  ggplot(aes(x = season, y = goals)) +
  geom_point(color = "red") +
  geom_smooth(color = "black")

ggplotly(all_goals)
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 2016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 2.015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 7.344e-17
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 4.0602
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
## 2016
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 2.015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
## number 7.344e-17
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other near
## singularities as well. 4.0602

There does not appear to be a significant trend in overal goal scorring from 2016 to 2019. 2016, however, 2016 is noticably lower than 2017, 2018, and 2019, which are all very similar in median values.

The Draft

Let’s explore some of the rounds & popular feeder university programs.

top_feeders <- draft_data %>%
  filter(round == 1) %>%
  count(previous_team, sort = TRUE)

top_feeders
##                         previous_team n
## 1                                UCLA 7
## 2                       Florida State 6
## 3                            Stanford 5
## 4                      North Carolina 4
## 5                     Duke University 3
## 6                          Penn State 3
## 7   University of Southern California 3
## 8                            Virginia 3
## 9                             Florida 2
## 10                         Pepperdine 2
## 11                Stanford University 2
## 12                         Texas Tech 2
## 13           West Virginia University 2
## 14  ASA Chesapeake Charge, Penn State 1
## 15                     Boston College 1
## 16 Boston College, Canberra United FC 1
## 17           Brigham Young University 1
## 18                                BYU 1
## 19                         California 1
## 20                            Clemson 1
## 21                 Harvard University 1
## 22                           Illinois 1
## 23                           Kentucky 1
## 24                           Michigan 1
## 25                           NC State 1
## 26                         Notre Dame 1
## 27                     Oklahoma State 1
## 28                           Portland 1
## 29                        Santa Clara 1
## 30             Santa Clara University 1
## 31                         St. John's 1
## 32                          Texas A&M 1
## 33           University of California 1
## 34             University of Colorado 1
## 35              University of Florida 1
## 36             University of Nebraska 1
## 37           University of Notre Dame 1
## 38       University of South Carolina 1
## 39        University of South Florida 1
## 40             University of Virginia 1
## 41            University of Wisconsin 1
## 42                                USC 1
## 43        Washington State University 1

UCLA is the top provider of 1st round NWSL draft picks since 2013, closely followed by FSU and Stanford. Let’s explore the draft as a whole:

draft_colleges <- draft_data %>%
    count(previous_team, sort = TRUE)

draft_colleges
##                             previous_team  n
## 1                                    UCLA 15
## 2                              Penn State 12
## 3                                Stanford 11
## 4                           Florida State 10
## 5                          North Carolina 10
## 6                                Virginia 10
## 7                         Duke University  9
## 8       University of Southern California  7
## 9                                 Florida  6
## 10                 University of Virginia  6
## 11                                   Duke  5
## 12                             Pepperdine  5
## 13                            Santa Clara  5
## 14                 University of Nebraska  5
## 15           University of North Carolina  5
## 16                            Wake Forest  5
## 17                         Boston College  4
## 18               Brigham Young University  4
## 19                                Clemson  4
## 20                             Notre Dame  4
## 21                                Rutgers  4
## 22                              Texas A&M  4
## 23                             Texas Tech  4
## 24                             Georgetown  3
## 25                               Maryland  3
## 26                           Northwestern  3
## 27                               Portland  3
## 28                     Rutgers University  3
## 29                    Stanford University  3
## 30                                    UCF  3
## 31               University of California  3
## 32                 University of Colorado  3
## 33                  University of Florida  3
## 34                                    USC  3
## 35                          West Virginia  3
## 36                          Arizona State  2
## 37                                    BYU  2
## 38                               Colorado  2
## 39                       Colorado College  2
## 40                  Georgetown University  2
## 41                               Illinois  2
## 42                               Kentucky  2
## 43                               Michigan  2
## 44                               Missouri  2
## 45                  Ohio State University  2
## 46     University of California, Berkeley  2
## 47              University of Connecticut  2
## 48                   University of Kansas  2
## 49               University of Washington  2
## 50                University of Wisconsin  2
## 51                          Virginia Tech  2
## 52            Washington State University  2
## 53               West Virginia University  2
## 54                         William & Mary  2
## 55                               Arkansas  1
## 56      ASA Chesapeake Charge, Penn State  1
## 57                      Auburn University  1
## 58                                 Baylor  1
## 59     Boston College, Canberra United FC  1
## 60                          Bowling Green  1
## 61                                 Butler  1
## 62                    Cal State Fullerton  1
## 63                             California  1
## 64                        Central Florida  1
## 65  Chicago Eclipse Select, Florida State  1
## 66       Chicago Eclipse Select, Stanford  1
## 67             D.C. United Women, Florida  1
## 68                                 Dayton  1
## 69                                 Denver  1
## 70                                 DePaul  1
## 71                      DePaul University  1
## 72             Georgia, Lincoln Ladies FC  1
## 73                     Harvard University  1
## 74                                Hofstra  1
## 75                         Illinois State  1
## 76              Illinois State University  1
## 77                          James Madison  1
## 78                                 Kansas  1
## 79                         Loyola–Chicago  1
## 80                                    LSU  1
## 81                              Marquette  1
## 82                   Marquette University  1
## 83                              Minnesota  1
## 84                            Mississippi  1
## 85                      Mississippi State  1
## 86           Mississippi State University  1
## 87                         Missouri State  1
## 88                               NC State  1
## 89                               Nebraska  1
## 90      New York Athletic Club, Princeton  1
## 91                           Northeastern  1
## 92                      Northern Colorado  1
## 93                         Oklahoma State  1
## 94                               Ole Miss  1
## 95                                 Oregon  1
## 96                           Oregon State  1
## 97                  Pepperdine University  1
## 98                   Princeton University  1
## 99                          San Francisco  1
## 100                Santa Clara University  1
## 101                               Seattle  1
## 102                        South Carolina  1
## 103                            St. John's  1
## 104                                   TCU  1
## 105                                 Texas  1
## 106                    UNC, Bayern Munich  1
## 107                 University of Alabama  1
## 108      University of California, Irvine  1
## 109         University of Central Florida  1
## 110                  University of Denver  1
## 111               University of Minnesota  1
## 112              University of Notre Dame  1
## 113          University of South Carolina  1
## 114           University of South Florida  1
## 115                   University of Texas  1
## 116                                  Utah  1
## 117                             Villanova  1
## 118                      Washington State  1
## 119                      William and Mary  1
## 120                             Wisconsin  1
## 121                                  Yale  1
## 122                       Yale University  1

UCLA is again the top contender, but Penn State situates itself in second place’s position, providing 12 NWSL draft picks since 2013.***NOTE: I need to clean this data better before making any substantial statements about it.

Passing Accuracy

adv_player_stats$player_name = paste(adv_player_stats$first_name, adv_player_stats$last_name)
  
head(adv_player_stats$player_name)
## [1] "Michelle Betos"     "Meghan Klingenberg" "Emily Menges"      
## [4] "Emily Sonnett"      "Katherine Reynolds" "Amandine Henry"
adv_player_stats$passing_acc = adv_player_stats$accurate_pass / adv_player_stats$total_pass

head(adv_player_stats$passing_acc)
## [1] 0.8536585 0.8571429 0.9062500 0.8507463 0.7500000 0.8684211
passing_accuracy <- adv_player_stats %>%
  group_by(player_name) %>%
  summarize(avg_passing_acc = mean(passing_acc))

passing_accuracy <- passing_accuracy %>%
  arrange(desc(avg_passing_acc))

passing_accuracy
## # A tibble: 440 x 2
##    player_name               avg_passing_acc
##    <chr>                               <dbl>
##  1 Lisa De Vanna                       0.844
##  2 Samantha Staab                      0.824
##  3 Hope Solo                           0.817
##  4 Kim Little                          0.813
##  5 Kendall Lorraine Fletcher           0.807
##  6 Keelin Winters                      0.800
##  7 Andi Sullivan                       0.800
##  8 Veronica Boquete                    0.793
##  9 Linda Motlhalo                      0.791
## 10 Emily Menges                        0.789
## # … with 430 more rows

The NWSL players with the statistically highest passing accuracy are: 1) Lisa De Vanna, 2) Samantha Staab, and 3) Hope Solo. Let’s check them out:

player_name_and_id <- data_frame(players$person_id, players$player)
## Warning: `data_frame()` is deprecated as of tibble 1.1.0.
## Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
player_name_and_id
## # A tibble: 555 x 2
##    `players$person_id` `players$player`
##                  <dbl> <chr>           
##  1                 342 Marisa Abegg    
##  2                 117 Danesha Adams   
##  3                   6 Adriana         
##  4                 300 Leigh Ann Brown 
##  5                 202 Jazmyne Avant   
##  6                  28 Amy Barczuk     
##  7                 290 Lauren Barnes   
##  8                  56 Brittany Bock   
##  9                 313 Liz Bogus       
## 10                 363 Melanie Booth   
## # … with 545 more rows
player_name_and_id <- player_name_and_id %>%
  rename(person_id = "players$person_id", player = "players$player")
player_season_stats <- full_join(player_name_and_id, player_season_stats, by = "person_id")
player_season_stats %>%
  filter(player.x == "Lisa De Vanna")
## # A tibble: 4 x 16
##   person_id player.x player.y nation pos   season team_id    mp starts   min
##       <dbl> <chr>    <chr>    <chr>  <chr>  <dbl> <chr>   <dbl>  <dbl> <dbl>
## 1       311 Lisa De… Lisa De… AUS    FW      2013 NJ         16     15    NA
## 2       311 Lisa De… Lisa De… AUS    FW      2014 WAS        11      9   794
## 3       311 Lisa De… Lisa De… AUS    FW      2014 BOS         6      5   448
## 4       311 Lisa De… Lisa De… AUS    FW      2016 ORL         3      2   153
## # … with 6 more variables: gls <dbl>, ast <dbl>, pk <dbl>, p_katt <dbl>,
## #   crd_y <dbl>, crd_r <dbl>
adv_player_stats %>%
  filter(player_name == "Samantha Staab")
## # A tibble: 24 x 269
##    game_id status team_id first_name last_name person_id shirt_number position
##    <chr>   <chr>  <chr>   <chr>      <chr>         <dbl>        <dbl> <chr>   
##  1 portla… away   WAS     Samantha   Staab           444            3 Defender
##  2 washin… home   WAS     Samantha   Staab           444            3 Defender
##  3 housto… away   WAS     Samantha   Staab           444            3 Defender
##  4 chicag… away   WAS     Samantha   Staab           444            3 Defender
##  5 washin… home   WAS     Samantha   Staab           444            3 Defender
##  6 orland… away   WAS     Samantha   Staab           444            3 Defender
##  7 washin… home   WAS     Samantha   Staab           444            3 Defender
##  8 washin… home   WAS     Samantha   Staab           444            3 Defender
##  9 portla… away   WAS     Samantha   Staab           444            3 Defender
## 10 washin… home   WAS     Samantha   Staab           444            3 Defender
## # … with 14 more rows, and 261 more variables: position_side <chr>,
## #   game_started <dbl>, mins_played <dbl>, formation_place <dbl>,
## #   total_sub_on <dbl>, total_sub_off <dbl>, player_off_id <chr>,
## #   player_on_id <chr>, sub_position <chr>, leftside_pass <dbl>,
## #   accurate_keeper_sweeper <dbl>, accurate_pass <dbl>,
## #   total_final_third_passes <dbl>, rightside_pass <dbl>,
## #   attempts_conceded_ibox <dbl>, touches <dbl>, total_fwd_zone_pass <dbl>,
## #   keeper_pick_up <dbl>, att_assist_openplay <dbl>,
## #   accurate_fwd_zone_pass <dbl>, saves <dbl>, attempts_conceded_obox <dbl>,
## #   ball_recovery <dbl>, turnover <dbl>, poss_won_def_3_rd <dbl>,
## #   accurate_back_zone_pass <dbl>, successful_open_play_pass <dbl>,
## #   total_back_zone_pass <dbl>, total_long_balls <dbl>,
## #   accurate_keeper_throws <dbl>, goal_kicks <dbl>, open_play_pass <dbl>,
## #   total_pass <dbl>, total_launches <dbl>, fwd_pass <dbl>,
## #   ontarget_att_assist <dbl>, long_pass_own_to_opp <dbl>,
## #   total_keeper_sweeper <dbl>, successful_final_third_passes <dbl>,
## #   keeper_throws <dbl>, accurate_launches <dbl>, poss_lost_all <dbl>,
## #   accurate_long_balls <dbl>, clean_sheet <dbl>, accurate_goal_kicks <dbl>,
## #   saved_obox <dbl>, unsuccessful_touch <dbl>, poss_lost_ctrl <dbl>,
## #   final_third_entries <dbl>, long_pass_own_to_opp_success <dbl>,
## #   total_att_assist <dbl>, shot_faced <dbl>, duel_lost <dbl>,
## #   blocked_scoring_att <dbl>, poss_won_att_3_rd <dbl>, dispossessed <dbl>,
## #   accurate_cross <dbl>, att_rf_total <dbl>, won_tackle <dbl>,
## #   total_chipped_pass <dbl>, lost_corners <dbl>, total_scoring_att <dbl>,
## #   total_throws <dbl>, att_obx_centre <dbl>, att_openplay <dbl>,
## #   poss_won_mid_3_rd <dbl>, freekick_cross <dbl>, touches_in_opp_box <dbl>,
## #   accurate_chipped_pass <dbl>, duel_won <dbl>, total_cross_nocorner <dbl>,
## #   total_tackle <dbl>, passes_left <dbl>, total_cross <dbl>,
## #   att_obox_blocked <dbl>, head_pass <dbl>, crosses_18_yard <dbl>,
## #   accurate_cross_nocorner <dbl>, effective_clearance <dbl>,
## #   won_corners <dbl>, interception <dbl>, attempted_tackle_foul <dbl>,
## #   backward_pass <dbl>, interception_won <dbl>, pen_area_entries <dbl>,
## #   accurate_throws <dbl>, fouls <dbl>, total_clearance <dbl>,
## #   crosses_18_yardplus <dbl>, total_shots <dbl>,
## #   effective_blocked_cross <dbl>, outfielder_block <dbl>, blocked_cross <dbl>,
## #   shield_ball_oop <dbl>, offside_provoked <dbl>, ontarget_scoring_att <dbl>,
## #   passes_right <dbl>, att_ibox_target <dbl>, att_bx_centre <dbl>,
## #   att_sv_low_centre <dbl>, …
adv_player_stats %>%
  filter(player_name == "Hope Solo")
## # A tibble: 8 x 269
##   game_id status team_id first_name last_name person_id shirt_number position
##   <chr>   <chr>  <chr>   <chr>      <chr>         <dbl>        <dbl> <chr>   
## 1 seattl… home   SEA     Hope       Solo          10022            1 Goalkee…
## 2 sky-bl… away   SEA     Hope       Solo          10022            1 Goalkee…
## 3 seattl… home   SEA     Hope       Solo          10022            1 Goalkee…
## 4 seattl… home   SEA     Hope       Solo          10022            1 Goalkee…
## 5 boston… away   SEA     Hope       Solo          10022            1 Goalkee…
## 6 orland… away   SEA     Hope       Solo          10022            1 Goalkee…
## 7 kansas… away   SEA     Hope       Solo          10022            1 Goalkee…
## 8 seattl… home   SEA     Hope       Solo          10022            1 Goalkee…
## # … with 261 more variables: position_side <chr>, game_started <dbl>,
## #   mins_played <dbl>, formation_place <dbl>, total_sub_on <dbl>,
## #   total_sub_off <dbl>, player_off_id <chr>, player_on_id <chr>,
## #   sub_position <chr>, leftside_pass <dbl>, accurate_keeper_sweeper <dbl>,
## #   accurate_pass <dbl>, total_final_third_passes <dbl>, rightside_pass <dbl>,
## #   attempts_conceded_ibox <dbl>, touches <dbl>, total_fwd_zone_pass <dbl>,
## #   keeper_pick_up <dbl>, att_assist_openplay <dbl>,
## #   accurate_fwd_zone_pass <dbl>, saves <dbl>, attempts_conceded_obox <dbl>,
## #   ball_recovery <dbl>, turnover <dbl>, poss_won_def_3_rd <dbl>,
## #   accurate_back_zone_pass <dbl>, successful_open_play_pass <dbl>,
## #   total_back_zone_pass <dbl>, total_long_balls <dbl>,
## #   accurate_keeper_throws <dbl>, goal_kicks <dbl>, open_play_pass <dbl>,
## #   total_pass <dbl>, total_launches <dbl>, fwd_pass <dbl>,
## #   ontarget_att_assist <dbl>, long_pass_own_to_opp <dbl>,
## #   total_keeper_sweeper <dbl>, successful_final_third_passes <dbl>,
## #   keeper_throws <dbl>, accurate_launches <dbl>, poss_lost_all <dbl>,
## #   accurate_long_balls <dbl>, clean_sheet <dbl>, accurate_goal_kicks <dbl>,
## #   saved_obox <dbl>, unsuccessful_touch <dbl>, poss_lost_ctrl <dbl>,
## #   final_third_entries <dbl>, long_pass_own_to_opp_success <dbl>,
## #   total_att_assist <dbl>, shot_faced <dbl>, duel_lost <dbl>,
## #   blocked_scoring_att <dbl>, poss_won_att_3_rd <dbl>, dispossessed <dbl>,
## #   accurate_cross <dbl>, att_rf_total <dbl>, won_tackle <dbl>,
## #   total_chipped_pass <dbl>, lost_corners <dbl>, total_scoring_att <dbl>,
## #   total_throws <dbl>, att_obx_centre <dbl>, att_openplay <dbl>,
## #   poss_won_mid_3_rd <dbl>, freekick_cross <dbl>, touches_in_opp_box <dbl>,
## #   accurate_chipped_pass <dbl>, duel_won <dbl>, total_cross_nocorner <dbl>,
## #   total_tackle <dbl>, passes_left <dbl>, total_cross <dbl>,
## #   att_obox_blocked <dbl>, head_pass <dbl>, crosses_18_yard <dbl>,
## #   accurate_cross_nocorner <dbl>, effective_clearance <dbl>,
## #   won_corners <dbl>, interception <dbl>, attempted_tackle_foul <dbl>,
## #   backward_pass <dbl>, interception_won <dbl>, pen_area_entries <dbl>,
## #   accurate_throws <dbl>, fouls <dbl>, total_clearance <dbl>,
## #   crosses_18_yardplus <dbl>, total_shots <dbl>,
## #   effective_blocked_cross <dbl>, outfielder_block <dbl>, blocked_cross <dbl>,
## #   shield_ball_oop <dbl>, offside_provoked <dbl>, ontarget_scoring_att <dbl>,
## #   passes_right <dbl>, att_ibox_target <dbl>, att_bx_centre <dbl>,
## #   att_sv_low_centre <dbl>, …

After some further investigation, we find that De Vanna has only played in 3 games, rendering her passing accuracy score less than completely picturesque of her footballer skills. Although her accuracy for those 3 games in 2016 is certanly notable, it is not entirely fair to compare them again Staab’s 24, or even Solo’s 8. I might place a filter on number of entries so we can compare passing accuracy across players that have played many games and therfore have had their passing tested under many circumstances.

Mallory Pugh Comparison

USWNT FWs passing accuracies

library(stringr)

# get the date into its own columns by year, month, day
adv_player_stats$date <- 
  str_sub(adv_player_stats$game_id,-10, -1)

adv_player_stats <- adv_player_stats %>%
  separate(date, c("year", "month", "day"), "-")
# mallory pugh
mallory_pugh_passacc <- adv_player_stats %>%
  filter(player_name == "Mallory Pugh") %>%
  drop_na(passing_acc) %>%
  summarize(passing_acc, player_name, year)

# sophia smith not in this NWSL database?
sophia_smith_passacc <- adv_player_stats %>%
  filter(player_name == "Sophia Smith") %>%
  drop_na(passing_acc) %>%
  summarize(passing_acc, player_name, year)

# lynn williams
lynn_williams_passacc <- adv_player_stats %>%
  filter(player_name == "Lynn Williams") %>%
  drop_na(passing_acc) %>%
  summarize(passing_acc, player_name, year)


# megan rapinoe
megan_rapinoe_passacc <- adv_player_stats %>%
  filter(player_name == "Megan Rapinoe") %>%
  drop_na(passing_acc) %>%
  summarize(passing_acc, player_name, year)

# carli lloyd
carli_lloyd_passacc <- adv_player_stats %>%
  filter(player_name == "Carli Lloyd") %>%
  drop_na(passing_acc) %>%
  summarize(passing_acc, player_name, year)
uswnt_fw_passacc <- adv_player_stats %>%
  filter(player_name == c("Mallory Pugh", "Lynn Williams", "Carli Lloyd", "Megan Rapinoe")) %>%
  drop_na(passing_acc) %>%
  summarize(passing_acc, player_name, year)
  
uswnt_fw_passacc
## # A tibble: 60 x 3
##    passing_acc player_name   year 
##          <dbl> <chr>         <chr>
##  1       0.743 Carli Lloyd   2016 
##  2       0.5   Lynn Williams 2016 
##  3       0.5   Lynn Williams 2016 
##  4       0.727 Carli Lloyd   2016 
##  5       0.75  Megan Rapinoe 2016 
##  6       0.5   Lynn Williams 2016 
##  7       0.933 Carli Lloyd   2016 
##  8       0.639 Megan Rapinoe 2016 
##  9       0.542 Lynn Williams 2016 
## 10       0.429 Lynn Williams 2016 
## # … with 50 more rows
write.csv(uswnt_fw_passacc, "uswnt_fw_passacc.csv")

USWNT FWs scoring frequencies

megan_rapinoe_season_goals_per_min <- player_season_stats %>%
  group_by(season) %>%
  filter(player.x == c("Megan Rapinoe")) %>%
  mutate(goals_per_min = gls/min) %>%
  summarize(player.x, season, goals_per_min)

carli_lloyd_season_goals_per_min <- player_season_stats %>%
  group_by(season) %>%
  filter(player.x == c("Carli Lloyd")) %>%
  mutate(goals_per_min = gls/min) %>%
  summarize(player.x, season, goals_per_min)

lynn_williams_season_goals_per_min <- player_season_stats %>%
  group_by(season) %>%
  filter(player.x == c("Lynn Williams")) %>%
  mutate(goals_per_min = gls/min) %>%
  summarize(player.x, season, goals_per_min)

mal_pugh_season_goals_per_min <- player_season_stats %>%
  group_by(season) %>%
  filter(player.x == c("Mallory Pugh")) %>%
  mutate(goals_per_min = gls/min) %>%
  summarize(player.x, season, goals_per_min)


uswnt_fw_goals_per_min <- rbind(megan_rapinoe_season_goals_per_min, carli_lloyd_season_goals_per_min, lynn_williams_season_goals_per_min, mal_pugh_season_goals_per_min, by = "season")

write.csv(uswnt_fw_goals_per_min, "uswnt_fw_season_goals_per_min.csv")

USWNT FWs Final Third Passes

uswnt_fw_final_third_passes <- adv_player_stats %>%
  filter(player_name == c("Mallory Pugh", "Lynn Williams", "Carli Lloyd", "Megan Rapinoe")) %>%
  group_by(player_name) %>%
  summarise(mean(successful_final_third_passes))
  
uswnt_fw_final_third_passes
## # A tibble: 4 x 2
##   player_name   `mean(successful_final_third_passes)`
## * <chr>                                         <dbl>
## 1 Carli Lloyd                                   10.2 
## 2 Lynn Williams                                  6.07
## 3 Mallory Pugh                                   8.29
## 4 Megan Rapinoe                                 10.6

USWNT FW Summary

uswnt_fw_nswl_stats <- adv_player_stats %>%
  filter(player_name == c("Mallory Pugh", "Carli Lloyd", "Lynn Williams", "Megan Rapinoe")) %>%
  mutate(forward_zone_pass_acc = accurate_fwd_zone_pass/total_fwd_zone_pass) %>%
  group_by(player_name) %>%
  summarise(player_name, year, mins_played, goals, big_chance_missed, goal_assist, total_offside, forward_zone_pass_acc, turnover, ontarget_att_assist, ontarget_scoring_att, successful_final_third_passes, leftside_pass, rightside_pass)




avg_uswnt_fw_nswl_stats <- adv_player_stats %>%
  filter(player_name == c("Mallory Pugh", "Carli Lloyd", "Lynn Williams", "Megan Rapinoe")) %>%
  mutate(forward_zone_pass_acc = accurate_fwd_zone_pass/total_fwd_zone_pass) %>%
  group_by(year) %>%
  group_by(player_name) %>%
  mutate(avg_mins_played = mean(mins_played)) %>%
  #mutate(avg_forward_zone_pass_acc = mean(forward_zone_pass_acc)) %>%
  mutate(avg_turnover = mean(turnover)) %>%
  mutate(avg_goals = mean(goals)) %>%
  mutate(avg_big_chance_missed = mean(big_chance_missed)) %>%
  mutate(avg_goal_assist = mean(goal_assist)) %>%
  mutate(avg_total_offside = mean(total_offside)) %>%
  mutate(avg_ontarget_att_assist = mean(ontarget_att_assist)) %>%
  mutate(avg_ontarget_scoring_att = mean(ontarget_scoring_att)) %>%
  mutate(avg_successful_final_third_passes = mean(successful_final_third_passes)) %>%
  mutate(avg_left_pass = mean(leftside_pass)) %>%
  mutate(avg_right_pass = mean(rightside_pass)) %>%
  summarise(player_name, year, avg_mins_played, avg_turnover, avg_goals,avg_big_chance_missed , avg_goal_assist, avg_total_offside,  avg_ontarget_att_assist, avg_ontarget_scoring_att, avg_successful_final_third_passes, avg_left_pass, avg_right_pass)

avg_uswnt_fw_nswl_stats
## # A tibble: 54 x 13
## # Groups:   player_name [4]
##    player_name year  avg_mins_played avg_turnover avg_goals avg_big_chance_…
##    <chr>       <chr>           <dbl>        <dbl>     <dbl>            <dbl>
##  1 Carli Lloyd 2016               85         2.09     0.364            0.455
##  2 Carli Lloyd 2017               85         2.09     0.364            0.455
##  3 Carli Lloyd 2018               85         2.09     0.364            0.455
##  4 Carli Lloyd 2018               85         2.09     0.364            0.455
##  5 Carli Lloyd 2018               85         2.09     0.364            0.455
##  6 Carli Lloyd 2018               85         2.09     0.364            0.455
##  7 Carli Lloyd 2018               85         2.09     0.364            0.455
##  8 Carli Lloyd 2018               85         2.09     0.364            0.455
##  9 Carli Lloyd 2018               85         2.09     0.364            0.455
## 10 Carli Lloyd 2019               85         2.09     0.364            0.455
## # … with 44 more rows, and 7 more variables: avg_goal_assist <dbl>,
## #   avg_total_offside <dbl>, avg_ontarget_att_assist <dbl>,
## #   avg_ontarget_scoring_att <dbl>, avg_successful_final_third_passes <dbl>,
## #   avg_left_pass <dbl>, avg_right_pass <dbl>
write.csv(uswnt_fw_nswl_stats, "uswnt_fw_nswl_stats.csv")
megan_rapinoe_annual_nswl_stats <- adv_player_stats %>%
  filter(player_name == c("Megan Rapinoe")) %>%
  mutate(forward_zone_pass_acc = accurate_fwd_zone_pass/total_fwd_zone_pass) %>%
  group_by(year) %>%
  mutate(avg_mins_played = mean(mins_played)) %>%
  #mutate(avg_forward_zone_pass_acc = mean(forward_zone_pass_acc)) %>%
  mutate(avg_turnover = mean(turnover)) %>%
  mutate(avg_goals = mean(goals)) %>%
  mutate(avg_big_chance_missed = mean(big_chance_missed)) %>%
  mutate(avg_goal_assist = mean(goal_assist)) %>%
  mutate(avg_total_offside = mean(total_offside)) %>%
  mutate(avg_ontarget_att_assist = mean(ontarget_att_assist)) %>%
  mutate(avg_ontarget_scoring_att = mean(ontarget_scoring_att)) %>%
  mutate(avg_successful_final_third_passes = mean(successful_final_third_passes)) %>%
  mutate(avg_left_pass = mean(leftside_pass)) %>%
  mutate(avg_right_pass = mean(rightside_pass)) %>%
  
  summarise(year, avg_mins_played, avg_turnover, avg_goals,avg_big_chance_missed , avg_goal_assist, avg_total_offside,  avg_ontarget_att_assist, avg_ontarget_scoring_att, avg_successful_final_third_passes, avg_left_pass, avg_right_pass)


megan_rapinoe_annual_nswl_stats <- distinct(megan_rapinoe_annual_nswl_stats)

megan_rapinoe_annual_nswl_stats
## # A tibble: 4 x 12
## # Groups:   year [4]
##   year  avg_mins_played avg_turnover avg_goals avg_big_chance_… avg_goal_assist
##   <chr>           <dbl>        <dbl>     <dbl>            <dbl>           <dbl>
## 1 2016             41           1        0.2             0               0.2   
## 2 2017             88.8         2.47     0.706           0.0588          0.0588
## 3 2018             82.6         2.88     0.412           0.176           0.353 
## 4 2019             75.3         3.17     0               0.333           0     
## # … with 6 more variables: avg_total_offside <dbl>,
## #   avg_ontarget_att_assist <dbl>, avg_ontarget_scoring_att <dbl>,
## #   avg_successful_final_third_passes <dbl>, avg_left_pass <dbl>,
## #   avg_right_pass <dbl>
write.csv(megan_rapinoe_annual_nswl_stats,"FW/megan_rapinoe_annual_nswl_stats.csv")

NWSL FW Summary

# 
starters_fw_nswl_stats <- adv_player_stats %>%
  filter(position == c("Striker")) %>%
  mutate(forward_zone_pass_acc = accurate_fwd_zone_pass/total_fwd_zone_pass) %>%
  group_by(year) %>%
  mutate(avg_mins_played = mean(mins_played)) %>%
  #mutate(avg_forward_zone_pass_acc = mean(forward_zone_pass_acc)) %>%
  mutate(avg_turnover = mean(turnover)) %>%
  mutate(avg_goals = mean(goals)) %>%
  mutate(avg_big_chance_missed = mean(big_chance_missed)) %>%
  mutate(avg_goal_assist = mean(goal_assist)) %>%
  mutate(avg_total_offside = mean(total_offside)) %>%
  mutate(avg_ontarget_att_assist = mean(ontarget_att_assist)) %>%
  mutate(avg_ontarget_scoring_att = mean(ontarget_scoring_att)) %>%
  mutate(avg_successful_final_third_passes = mean(successful_final_third_passes)) %>%
  mutate(avg_left_pass = mean(leftside_pass)) %>%
  mutate(avg_right_pass = mean(rightside_pass)) %>%
  
  summarise(mean(avg_mins_played), mean(avg_turnover), mean(avg_goals), mean(avg_big_chance_missed) , mean(avg_goal_assist), mean(avg_total_offside),  mean(avg_ontarget_att_assist), mean(avg_ontarget_scoring_att), mean(avg_successful_final_third_passes), mean(avg_left_pass), mean(avg_right_pass))

#distinct(fw_nswl_stats)

write.csv(distinct(starters_fw_nswl_stats), "starters_avg_fw_nwsl_stats.csv")



starters_fw_nswl_stats 
## # A tibble: 4 x 12
##   year  `mean(avg_mins_… `mean(avg_turno… `mean(avg_goals… `mean(avg_big_c…
## * <chr>            <dbl>            <dbl>            <dbl>            <dbl>
## 1 2016              81.7             1.84            0.272            0.127
## 2 2017              81.6             2.58            0.298            0.229
## 3 2018              81.3             2.81            0.271            0.409
## 4 2019              81.7             2.68            0.298            0.375
## # … with 7 more variables: `mean(avg_goal_assist)` <dbl>,
## #   `mean(avg_total_offside)` <dbl>, `mean(avg_ontarget_att_assist)` <dbl>,
## #   `mean(avg_ontarget_scoring_att)` <dbl>,
## #   `mean(avg_successful_final_third_passes)` <dbl>,
## #   `mean(avg_left_pass)` <dbl>, `mean(avg_right_pass)` <dbl>
year_by_year_passing_accuracy <- adv_player_stats %>%
  summarize(player_name, passing_acc, year)

year_by_year_passing_accuracy
## # A tibble: 15,696 x 3
##    player_name        passing_acc year 
##    <chr>                    <dbl> <chr>
##  1 Michelle Betos           0.854 2016 
##  2 Meghan Klingenberg       0.857 2016 
##  3 Emily Menges             0.906 2016 
##  4 Emily Sonnett            0.851 2016 
##  5 Katherine Reynolds       0.75  2016 
##  6 Amandine Henry           0.868 2016 
##  7 Allie Long               0.910 2016 
##  8 Tobin Heath              0.889 2016 
##  9 Lindsey Horan            0.896 2016 
## 10 Nadia Nadim              0.524 2016 
## # … with 15,686 more rows
mal_pugh_pass_acc <- year_by_year_passing_accuracy %>%
  group_by(year) %>%
  filter(player_name == "Mallory Pugh") %>%
  ggplot(aes(x = year, y = passing_acc)) +
  geom_point(color = "red", shape = "star") +
  geom_boxplot(color = "blue") +
  ylab("passing accuracy") + 
  ggtitle("Mallory Pugh's NWSL Passing Accuracy")


ggplotly(mal_pugh_pass_acc)
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
ggplotly(
  year_by_year_passing_accuracy %>%
  group_by(year) %>%
  filter(player_name == "Carli Lloyd") %>%
  ggplot(aes(x = year, y = passing_acc)) +
  geom_point(color = "red", shape = "star") +
  geom_boxplot(color = "blue") +
  ylab("passing accuracy") + 
  ggtitle("Carli Lloyd's NWSL Passing Accuracy")
)
ggplotly(
  year_by_year_passing_accuracy %>%
  group_by(year) %>%
  filter(player_name == "Rose Lavelle") %>%
  ggplot(aes(x = year, y = passing_acc)) +
  geom_point(color = "red", shape = "star") +
  geom_boxplot(color = "blue") +
  ylab("passing accuracy") + 
  ggtitle("Rose Lavelle's NWSL Passing Accuracy")
)
## Warning: Removed 4 rows containing non-finite values (stat_boxplot).
ggplotly(
  year_by_year_passing_accuracy %>%
  group_by(year) %>%
  filter(player_name == "Alex Morgan") %>%
  ggplot(aes(x = year, y = passing_acc)) +
  geom_point(color = "red", shape = "star") +
  geom_boxplot(color = "blue") +
  ylab("passing accuracy") + 
  ggtitle("Alex Morgan's NWSL Passing Accuracy")
)
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
year_by_year_passing_accuracy %>%
  filter(player_name == "Christen Press") %>%
  group_by(year) %>%
  summarize(mean(passing_acc))
## # A tibble: 4 x 2
##   year  `mean(passing_acc)`
## * <chr>               <dbl>
## 1 2016                0.690
## 2 2017                0.715
## 3 2018                0.658
## 4 2019                0.722

Goalkeepers

alyssa_naeher <- adv_player_stats %>%
  filter(player_name == "Alyssa Naeher")

hope_solo <- adv_player_stats %>%
  filter(player_name == "Hope Solo")

ashlyn_harris <- adv_player_stats %>%
  filter(player_name == "Ashlyn Harris")

adrianna_franch <- adv_player_stats %>%
  filter(player_name == "Adrianna Franch")